********************************************************************************
* Prepares data for two-way fixed effects (AKM) model estimation.
* Then calls MATLAB file VALUE_2_AKM.m to run the actual estimation.
* Then processes the output and stores final estimation results.
*
********************************************************************************
*** prepare
* start log
cap log close akm
log using "${LOG_DIR}/log_akm_${ext}.log", text name(akm) replace


*** read
//hire_month and sep_month are not needed but they guarantee the same id_unique as in VALUE_1_CONNECTED
do "${DO_DIR}/FUN_LOAD.do" $year_min $year_max "${vars_list} hire_month sep_month id_unique" // variable list "${vars_list}" defined in VALUE_0_MASTER.do
drop hire_month sep_month
// unique job identifier across years
egen double xxx = group(year id_unique)
cap recast double id_unique
replace id_unique = xxx
drop xxx

*** clean appended data
* drop variables no longer needed
if !${akm_occ} drop occ02_6
if !${akm_tenure} drop tenure
if !${akm_exp_act} drop exp_act

* drop variables not needed in AKM analysis
drop hours_year ind07_5 muni

* restrict to unique job IDs in connected set for AKM (subject to selection criteria imposed in VALUE_1_CONNECTED.do)
do "${DO_DIR}/FUN_MERGE_CONNECTED.do" 5 // input arguments: `1' = 1: merge with connected set of firm IDs used for AKM estimation; `1' = 2: merge with connected set of firm IDs used for PageRank estimation; `1' = 3: merge with both connected sets of firm IDs; `1' = 4: merge with both connected setes of firm IDs in one step (faster); `1' = 5: merge with connected set of unique observation IDs (worker-firm-year-job) used for AKM estimation; `1' = 6: merge with connected set of unique observation IDs (worker-firm-year-job) used for PageRank estimation.
drop id_unique

* convert income to natural logarithm
if $akm_hourly_wage replace ${income_var} = ln(${income_var}/(hours*4.34812)) // dependent variable of AKM
else replace ${income_var} = ln(${income_var})
if $akm_hourly_wage label var ${income_var} "Mean hourly earnings"
else label var ${income_var} "Mean monthly earnings"
if !${akm_hours} drop hours

* create experience with top coded education (= years of age - years of education - 6)
if $akm_exp_act | $akm_tenure {
	recode edu (1=0) (2=3) (3=5) (4=7) (5=9) (6=11) (7=12) (8=14) (9=16) (nonmissing=.), generate(edu_y) // codes: 1 "Illiterate (0 years)" 2 "Some primary school (1-5 years)" 3 "Primary school degree (5 years)" 4 "Some middle school (6-9 years)" 5 "Middle school degree (9 years)" 6 "Some high school (10-12 years)" 7 "High school degree (12 years)" 8 "Some college (13-15 years)" 9 "Bachelor's or higher degree (16+ years)"
	label var edu_y "Years of education"
	gen byte exp_pot = age - edu_y - 6
	drop edu_y
	label var exp_pot "Potential experience (years of age - years of education - 6)"
	replace exp_pot = max(min(exp_pot, age - 6), 0)
}

* transform experience without top coded education (from months to years)
if $akm_exp_act {
	replace exp_act = min(floor(exp_act/12), exp_pot)
	label var exp_act "Actual experience (years in formal sector)"
}

* transform tenure (from months to years)
if $akm_tenure {
	if $akm_exp_act replace tenure = min(floor(tenure/12), exp_pot, exp_act)
	else replace tenure = min(floor(tenure/12), exp_pot)
	label var tenure "Tenure (years at current employer)"
}

* drop potential experience, since collinear with year FE and person FE
if $akm_exp_act | $akm_tenure drop exp_pot


*** prepare data to be used for AKM estimation in MATLAB
* rename variables
rename ${income_var} inc
rename ${empid_var} empid

* format year so it can be read by MATLAB
replace year = year - ${year_min} + 1 // generate numerical year starting from 1
rename year year_akm
label var year_akm "Year (AKM format)"

* prepare to outsheet age variable if higher-order age terms are to be included, and outsheet ${categ_var} if AKM is run separately by ${categ_var}
if $age_poly_order local age_outsheet = "age" // if AKM estimation includes higher-order age terms
else local age_outsheet = ""
if $edu_inter local edu_outsheet = "edu" // if AKM estimation includes year effects and age effects (or higher-order age terms) interacted with education
else local edu_outsheet = ""
if $akm_by_categ local categ_outsheet = "${categ_var}" // if AKM estimation is run separately by ${categ_var}
else local categ_outsheet = ""

* check that all variables are nonmissing
sum, sep(0)
foreach var of varlist * {
	assert `var' < .
}

* coarsen variables
if $akm_coarsen {
	if $akm_hours prog_coarsen "hours" ${N_coarsen}
	if $akm_occ prog_coarsen "occ02_6" ${N_coarsen}
	if $akm_tenure prog_coarsen "tenure" ${N_coarsen}
	if $akm_exp_act prog_coarsen "exp_act" ${N_coarsen}
}

* save
sort persid year_akm
if $akm_hours local hours_outsheet = "hours"
else local hours_outsheet = ""
if $akm_occ local occ_outsheet = "occ02_6"
else local occ_outsheet = ""
if $akm_tenure local tenure_outsheet = "tenure"
else local tenure_outsheet = ""
if $akm_exp_act local exp_act_outsheet = "exp_act"
else local exp_act_outsheet = ""
order inc persid empid year_akm `age_outsheet' `edu_outsheet' `categ_outsheet' `hours_outsheet' `occ_outsheet' `tenure_outsheet' `exp_act_outsheet' // list of variables currently used: year persid ${categ_var} edu age ${empid_var} ${income_var}; list of variables currently not used: hours occ02_6 tenure exp_act
compress
prog_desc_sum_comp_save "${TEMP_DIR}/temp_akm_${year_min}_${year_max}_${ext}.dta"


*** run AKM estimation in MATLAB
forval g_loop = 0/$akm_by_categ {
	local g = `g_loop' + 1
	if $akm_by_categ {
		disp "* select observations for ${categ_var} = `g' (1 = men/nohsch, 2 = women/hsch)"
		if `g' == 1 keep if ${categ_var} == 1
		else if `g' == 2 use if ${categ_var} == 2 using "${TEMP_DIR}/temp_akm_${year_min}_${year_max}_${ext}.dta", clear
	}
	
	disp "* format variables for file export"
	local vars_format = "inc persid empid year_akm"
	if $age_poly_order local vars_format = "`vars_format' age"
	if $edu_inter local vars_format = "`vars_format' edu"
	if $akm_hours local vars_format = "`vars_format' hours"
	if $akm_occ local vars_format = "`vars_format' occ02_6"
	if $akm_tenure local vars_format = "`vars_format' tenure"
	if $akm_exp_act local vars_format = "`vars_format' exp_act"
	foreach var of local vars_format {
		sum `var', meanonly
		if "`var'" == "inc" format `var' %`=ceil(max(log10(abs(r(min))),log10(abs(r(max))))) + 6'.6f
		else format `var' %`=ceil(max(log10(abs(r(min))),log10(abs(r(max)))))'.0f
	}
	
	disp "* outsheet list of wages, worker IDs, employer IDs, year IDs (and possibly age)"
	sum `categ_outsheet' inc persid empid year_akm `age_outsheet' `edu_outsheet' `hours_outsheet' `occ_outsheet' `tenure_outsheet' `exp_act_outsheet', sep(0)
	outsheet inc persid empid year_akm `age_outsheet' `edu_outsheet' `hours_outsheet' `occ_outsheet' `tenure_outsheet' `exp_act_outsheet' using "${TEMP_DIR}/tomatlab_${year_min}_${year_max}_${ext}.csv", nonames nolabel replace // Note: in the future, replace -outsheet- command with -export delim- command.
	
	disp "* delete earlier output so as not to cause confusion"
	cap rm "${TEMP_DIR}/tostata_${year_min}_${year_max}_${ext}.txt"

	disp "* call MATLAB via shell"

	// EXPORT PARAMETERS TO BE READ FROM MATLAB
	clear
	set obs 1
	gen int year_min = ${year_min}
	gen int year_max = ${year_max}
	gen byte age_poly_order = ${age_poly_order}
	gen int age_flat_min = ${age_flat_min}
	gen int age_flat_max = ${age_flat_max}
	gen int age_norm = ${age_norm}
	gen int age_min = ${age_min}
	gen int age_max = ${age_max}
	gen byte edu_inter = ${edu_inter}
	gen byte akm_hours = ${akm_hours}
	gen byte akm_occ = ${akm_occ}
	gen byte akm_tenure = ${akm_tenure}
	gen byte akm_exp_act = ${akm_exp_act}
	gen double ext = ${ext}
	format year_min year_max %4.0f
	format age_poly_order edu_inter akm_hours akm_occ akm_tenure akm_exp_act %1.0f
	format age_flat_min age_flat_max age_norm age_min age_max %3.0f
	format ext %12.0f
	local parameters_exist = 1
	cap confirm file "${TEMP_DIR}/parameters_akm.csv"
	if !_rc disp as error "USER WARNING: Parameters file (${TEMP_DIR}/parameters_akm.csv) already exists -- entering sleep loop."
	while `parameters_exist' {
		cap confirm file "${TEMP_DIR}/parameters_akm.csv"
		local parameters_exist = !_rc
		if `parameters_exist' sleep 60000 // sleep for 60s
	}
	compress
	outsheet year_min year_max age_poly_order age_flat_min age_flat_max age_norm age_min age_max edu_inter akm_hours akm_occ akm_tenure akm_exp_act ext using "${TEMP_DIR}/parameters_akm.csv", nonames nolabel // Note: in the future, replace -outsheet- command with -export delim- command.
	clear
	!"${MATLABPATH}" -nojvm -r "run ${DO_DIR}/VALUE_2_AKM.m"
	cap confirm file "${TEMP_DIR}/stopakm_${ext}.txt"
	while _rc {
		sleep 10000
		cap confirm file "${TEMP_DIR}/stopakm_${ext}.txt"
	}
	cap rm "${TEMP_DIR}/stopakm_${ext}.txt"

	disp "* read MATLAB output"
	import delim using "${TEMP_DIR}/tostata_${year_min}_${year_max}_${ext}.txt", asdouble varnames(1) delim(tab) clear
	label var persid "Worker ID (deidentified)"
	rename y year
	label var year "Year"
	label var pe "Predicted AKM worker fixed effect"
	rename xb_y xb_year
	if "${empid_var}" == "empid_est" local emp_type = "establishment"
	else if "${empid_var}" == "empid_firm" local emp_type = "firm"
	else local emp_type = "`emp_type'"
	if $age_poly_order rename xb_a xb_age
	if $akm_hours rename xb_h xb_hours
	if $akm_occ rename xb_o xb_occ
	if $akm_tenure rename xb_ten xb_tenure
	if $akm_exp_act rename xb_exp xb_exp_act
	if $akm_by_categ local categ_prefix = "${categ_var}-"
	else local categ_prefix = ""
	label var fe "Predicted AKM `categ_prefix'`emp_type' FE"
	if !$edu_inter label var xb_year "Predicted AKM `categ_prefix'year FE"
	else label var xb_year "Predicted AKM `categ_prefix'education-year FE"
	if $age_poly_order == 1 {
		if !$edu_inter label var xb_age "Predicted AKM `categ_prefix'age FE"
		else label var xb_age "Predicted AKM `categ_prefix'education-age FE"
	}
	else if $age_poly_order >= 2 {
		if !$edu_inter label var xb_age "Predicted AKM higher-order `categ_prefix'age terms"
		else label var xb_age "Predicted AKM higher-order `categ_prefix'education-age terms"
	}
	if $akm_hours label var xb_hours "Predicted AKM `categ_prefix'hours FE"
	if $akm_occ label var xb_occ "Predicted AKM `categ_prefix'occupation FE"
	if $akm_tenure label var xb_tenure "Predicted AKM `categ_prefix'tenure FE"
	if $akm_exp_act label var xb_exp_act "Predicted AKM `categ_prefix'actual-experience FE"
	
	disp "* delete old data files used in AKM estimation"
	sleep 10000
	rm "${TEMP_DIR}/tomatlab_${year_min}_${year_max}_${ext}.csv"
	rm "${TEMP_DIR}/tostata_${year_min}_${year_max}_${ext}.txt"
	
	disp "* add other variables from temp file"
	rename year year_akm
	merge 1:1 persid year_akm using "${TEMP_DIR}/temp_akm_${year_min}_${year_max}_${ext}.dta", keep(match master) nogen
	if `g' - 1 == $akm_by_categ rm "${TEMP_DIR}/temp_akm_${year_min}_${year_max}_${ext}.dta"
	replace year_akm = year_akm + ${year_min} - 1
	rename year_akm year
	rename empid ${empid_var}
	rename inc ${income_var}
	
	if $akm_by_categ {
		if `g' == 1 prog_desc_sum_comp_save "${TEMP_DIR}/lset_g1_${year_min}_${year_max}_${ext}.dta"
		else if `g' == 2 {
			append using "${TEMP_DIR}/lset_g1_${year_min}_${year_max}_${ext}.dta"
			rm "${TEMP_DIR}/lset_g1_${year_min}_${year_max}_${ext}.dta"
		}
	}
}


*** save
* save data as default or with numeric extension
if $akm_default local ext_str = ""
else local ext_str = "_${ext}"

* save data with possibly ${categ_var}-specific estimates
if $akm_by_categ local categ_ext = "_${categ_var}"
else local categ_ext = ""

* save largest connected set including AKM estimates
sort year persid
compress
prog_desc_sum_comp_save "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta"


*** normalize AKM ${categ_var}-firm FEs to be mean zero in selected industries -- move to separate file, or the original AKM file?!
* load data containing employer IDs and industry codes
clear
forval y = $year_min/$year_max {
	append using "${WRITE_DIR}/`y'/${sample_prefix}clean`y'.dta", keep(${empid_var} ind07_5)
}

* keep unique observation per employer ID
bys ${empid_var}: keep if _n == 1

* save
prog_desc_sum_comp_save "${TEMP_DIR}/empid_ind_${year_min}_${year_max}`ext_str'.dta"


*** update largest connected set and AKM estimates data
* load largest connected set
use "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear

* merge in industry codes
merge m:1 ${empid_var} using "${TEMP_DIR}/empid_ind_${year_min}_${year_max}`ext_str'.dta", keepusing(${empid_var} ind07_5) keep(master match) nogen
rm "${TEMP_DIR}/empid_ind_${year_min}_${year_max}`ext_str'.dta"

* update previous AKM estimates with normalization separately for each ${categ_var}
forval g_loop = 0/$akm_by_categ {
	local g = `g_loop' + 1
	gen long ind07_5_temp = ind07_5
	local norm_ind_temp = "${akm_norm_ind_list}"
	local norm_ind_temp_commas = subinstr("`norm_ind_temp'", " ", ",", .)
	forval i = 1/5 {
		if $akm_by_categ sum fe if inlist(ind07_5_temp, `norm_ind_temp_commas') & ${categ_var} == `g', meanonly // CNAE 2.0 (2007) code 56112 = restaurants ("Restaurantes e outros estabelecimentos de serviços de alimentação e bebidas"). Source: https://cnae.ibge.gov.br/?view=classe&tipo=cnae&versao=5&classe=56112
		else sum fe if inlist(ind07_5_temp, `norm_ind_temp_commas'), meanonly // CNAE 2.0 (2007) code 56112 = restaurants ("Restaurantes e outros estabelecimentos de serviços de alimentação e bebidas"). Source: https://cnae.ibge.gov.br/?view=classe&tipo=cnae&versao=5&classe=56112
		if r(N) > 0 local industry_match = 1
		else local industry_match = 0
		if `industry_match' continue, break
		else {
			replace ind07_5_temp = floor(ind07_5_temp/10)
			local norm_ind_temp_2 = "`norm_ind_temp'"
			local norm_ind_temp = ""
			foreach ind in "`norm_ind_temp_2'" {
				local norm_ind_temp_entry = floor(`ind'/10)
				if "`norm_ind_temp'" == "" local norm_ind_temp = "`norm_ind_temp_entry'"
				else local norm_ind_temp = "`norm_ind_temp' `norm_ind_temp_entry'"
			}
			local norm_ind_temp_commas = subinstr("`norm_ind_temp'", " ", ",", .)
		}
	}
	if `industry_match' {
		local fe_norm_mean = r(mean)
		if $akm_by_categ {
			replace fe = fe - `fe_norm_mean' if ${categ_var} == `g'
			replace pe = pe + `fe_norm_mean' if ${categ_var} == `g'
		}
		else {
			replace fe = fe - `fe_norm_mean'
			replace pe = pe + `fe_norm_mean'
		}
	}
	else {
		disp as error "USER ERROR: Failed to de-mean AKM worker FEs and employer FEs at industry level."
		error 1
	}
	drop ind07_5_temp
}
drop ind07_5

* save updated file with normalized AKM estimates
prog_desc_sum_comp_save "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta"

* save file with ${categ_var}-specific and total employment at each firm
keep ${empid_var} year `categ_outsheet'
bys ${empid_var} year: gen long emp = _N
label var emp "Total employment"
if "`categ_outsheet'" == "${categ_var}" {
	bys ${empid_var} year ${categ_var}: gen emp_g = _N
	label var emp_g "$Category-specific employment"
	gen long emp_m = emp_g if ${categ_var} == 1
	bys ${empid_var} year (emp_m): replace emp_m = emp_m[1]
	replace emp_m = 0 if emp_m == .
	label var emp_m "Male employment"
	gen long emp_w = emp_g if ${categ_var} == 2
	bys ${empid_var} year (emp_w): replace emp_w = emp_w[1]
	replace emp_w = 0 if emp_w == .
	label var emp_w "Female employment"
	bys ${empid_var} ${categ_var} year: keep if _n == 1
}
else bys ${empid_var} year: keep if _n == 1
if $akm_by_categ bys ${categ_var}: sum empid_est emp_g emp_m emp_w
prog_desc_sum_comp_save "${TEMP_DIR}/emp`categ_ext'_${year_min}_${year_max}`ext_str'.dta"


*** summarize AKM components & compute variance decomposition
* load largest connected set
if ${akm_hours} local xb_hours = "xb_hours"
else local xb_hours = ""
if ${akm_occ} local xb_occ = "xb_occ"
else local xb_occ = ""
if ${akm_tenure} local xb_tenure = "xb_tenure"
else local xb_tenure = ""
if ${akm_exp_act} local xb_exp_act = "xb_exp_act"
else local xb_exp_act = ""
use ${empid_var} ${categ_var} ${income_var} pe fe xb_year xb_age `xb_hours' `xb_occ' `xb_tenure' `xb_exp_act' using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear

* generate ${categ_var}-specific employer sizes
local N_years = ${year_max} - ${year_min} + 1
if $connect_by_categ bys ${empid_var} ${categ_var}: gen emp_g = _N
else bys ${empid_var}: gen emp_g = _N
replace emp_g = emp_g/`N_years'
label var emp_g "Mean number of employees of a given ${categ_var}"

* generate residual
if ${akm_hours} local xb_hours = "+ xb_hours"
else local xb_hours = ""
if ${akm_occ} local xb_occ = "+ xb_occ"
else local xb_occ = ""
if ${akm_tenure} local xb_tenure = "+ xb_tenure"
else local xb_tenure = ""
if ${akm_exp_act} local xb_exp_act = "+ xb_exp_act"
else local xb_exp_act = ""
gen double resid = ${income_var} - (pe + fe + xb_year + xb_age `xb_hours' `xb_occ' `xb_tenure' `xb_exp_act')
label var resid "Predicted AKM ${categ_var}-specific residual"

* compute variance-covariance matrix and variance decomposition
forval g_loop = 0/$akm_by_categ {
	local g = `g_loop' + 1
	if $akm_by_categ & `g' == 1 preserve
	if ${akm_hours} local xb_hours = "xb_hours"
	else local xb_hours = ""
	if ${akm_occ} local xb_occ = "xb_occ"
	else local xb_occ = ""
	if ${akm_tenure} local xb_tenure = "xb_tenure"
	else local xb_tenure = ""
	if ${akm_exp_act} local xb_exp_act = "xb_exp_act"
	else local xb_exp_act = ""
	local cov_list = "${income_var} pe fe xb_year xb_age `xb_hours' `xb_occ' `xb_tenure' `xb_exp_act' resid"
	local min_size_emp_loop_list = "1 5 10 15 25 50 100 250 500 1000"
	foreach min_size_emp_loop of local min_size_emp_loop_list {
		if `min_size_emp_loop' >= $min_size_emp {
			disp _newline(5)
			if $akm_by_categ {
				disp "* ${categ_var} = `g', loading data for employer size >= ${min_size_emp}, summarizing data for employer size >= `min_size_emp_loop'"
				sum if ${categ_var} == `g' & emp_g >= `min_size_emp_loop' & emp_g < .
				count if ${categ_var} == `g' & emp_g >= `min_size_emp_loop' & emp_g < .
			}
			else {
				disp "* loading data for employer size >= ${min_size_emp}, summarizing data for employer size >= `min_size_emp_loop'"
				sum if emp_g >= `min_size_emp_loop' & emp_g < .
				count if emp_g >= `min_size_emp_loop' & emp_g < .
			}
			if r(N) > 0 {
				if $akm_by_categ {
					corr `cov_list' if ${categ_var} == `g' & emp_g >= `min_size_emp_loop' & emp_g < ., cov
					matrix C = r(C)
					corr pe fe if ${categ_var} == `g' & emp_g >= `min_size_emp_loop' & emp_g < .
				}
				else {
					corr `cov_list' if emp_g >= `min_size_emp_loop' & emp_g < ., cov
					matrix C = r(C)
					corr pe fe if emp_g >= `min_size_emp_loop' & emp_g < .
				}
				local cov_counter = 1
				foreach var of local cov_list {
					local var_`var' = C[`cov_counter',`cov_counter']
					local var_`var' : di %4.3f `var_`var''
					local ++cov_counter
					if "`var'" == "${income_var}" local cov = `var_`var''
					else local cov = `cov' - `var_`var''
				}
				foreach var of local cov_list {
					local var_share_`var' = 100*`var_`var''/`var_${income_var}'
					local var_share_`var' : di %4.1f `var_share_`var''
				}
				local var_share_cov = 100*`cov'/`var_${income_var}'
				local var_share_cov : di %4.1f `var_share_cov'
				foreach var of local cov_list {
					if "`var'" != "resid" disp "Var(`var') = `var_`var'' (`var_share_`var''%)"
				}
				disp "2*sum(Cov(.)) = `cov' (`var_share_cov'%)"
				disp "Var(resid) = `var_resid' (`var_share_resid'%)"
			}
		}
	}
	if $akm_by_categ & `g' == 1 restore
}


*** save AKM estimates separately
* save file with predicted worker FE only
use persid `categ_outsheet' pe if persid < . & pe < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
bys persid: keep if _n == 1
compress
order persid `categ_outsheet' pe
sort persid `categ_outsheet' pe
prog_desc_sum_comp_save "${TEMP_DIR}/akm_pe`categ_ext'_${year_min}_${year_max}`ext_str'.dta"

* save file with predicted AKM employer FE only
use ${empid_var} `categ_outsheet' fe if ${empid_var} < . & fe < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
bys ${empid_var} `categ_outsheet': keep if _n == 1
compress
order `categ_outsheet' ${empid_var} fe
sort `categ_outsheet' ${empid_var}
prog_desc_sum_comp_save "${TEMP_DIR}/akm_fe`categ_ext'_${year_min}_${year_max}`ext_str'.dta"
cap confirm file "/Users/cm3594/"
if !_rc prog_desc_sum_comp_save "${RESULTS_DIR}/akm_fe`categ_ext'_${year_min}_${year_max}`ext_str'.dta"

* save mean person effects by employer-year
use ${empid_var} year `categ_outsheet' pe if ${empid_var} < . & pe < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
egen mean_pe = mean(pe), by(${empid_var})
drop pe
bys ${empid_var} year `categ_outsheet': keep if _n == 1
compress
order `categ_outsheet' ${empid_var} year mean_pe
sort `categ_outsheet' ${empid_var} year
prog_desc_sum_comp_save "${TEMP_DIR}/akm_meanpe`categ_ext'_${year_min}_${year_max}`ext_str'.dta"


* save file with predicted AKM year FE only
use year `categ_outsheet' `edu_outsheet' xb_year if year < . & xb_year < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
bys year `categ_outsheet' `edu_outsheet': keep if _n == 1
compress
order `categ_outsheet' `edu_outsheet' year xb_year
sort `categ_outsheet' `edu_outsheet' year
prog_desc_sum_comp_save "${TEMP_DIR}/akm_xb_year`categ_ext'_${year_min}_${year_max}`ext_str'.dta"

* save file with only predicted AKM age FE or higher-order age terms / hours FE / occupation FE / tenure FE / actual-experience FE
if $age_poly_order {
	use age `categ_outsheet' `edu_outsheet' xb_age if age < . & xb_age < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
	bys age `categ_outsheet' `edu_outsheet': keep if _n == 1
	compress
	order `categ_outsheet' `edu_outsheet' age xb_age
	sort `categ_outsheet' `edu_outsheet' age
	prog_desc_sum_comp_save "${TEMP_DIR}/akm_xb_age`categ_ext'_${year_min}_${year_max}`ext_str'.dta"
}

* save file with predicted AKM hours FE only
if $akm_hours {
	use hours `categ_outsheet' xb_hours if hours < . & xb_hours < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
	bys hours `categ_outsheet': keep if _n == 1
	compress
	order `categ_outsheet' hours xb_hours
	sort `categ_outsheet' hours
	prog_desc_sum_comp_save "${TEMP_DIR}/akm_xb_hours`categ_ext'_${year_min}_${year_max}`ext_str'.dta"
}

* save file with predicted AKM occupation FE only
if $akm_occ {
	use occ02_6 `categ_outsheet' xb_occ if occ02_6 < . & xb_occ < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
	bys occ02_6 `categ_outsheet': keep if _n == 1
	compress
	order `categ_outsheet' occ02_6 xb_occ
	sort `categ_outsheet' occ02_6
	prog_desc_sum_comp_save "${TEMP_DIR}/akm_xb_occ`categ_ext'_${year_min}_${year_max}`ext_str'.dta"
}

* save file with predicted AKM tenure FE only
if $akm_tenure {
	use tenure `categ_outsheet' `edu_outsheet' xb_tenure if tenure < . & xb_tenure < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
	bys tenure `categ_outsheet' `edu_outsheet': keep if _n == 1
	compress
	order `categ_outsheet' `edu_outsheet' tenure xb_tenure
	sort `categ_outsheet' `edu_outsheet' tenure
	prog_desc_sum_comp_save "${TEMP_DIR}/akm_xb_tenure`categ_ext'_${year_min}_${year_max}`ext_str'.dta"
}

* save file with predicted AKM actual-experience FE only
if $akm_exp_act {
	use exp_act `categ_outsheet' `edu_outsheet' xb_exp_act if exp_act < . & xb_exp_act < . using "${TEMP_DIR}/lset`categ_ext'_${year_min}_${year_max}`ext_str'.dta", clear
	bys exp_act `categ_outsheet' `edu_outsheet': keep if _n == 1
	compress
	order `categ_outsheet' `edu_outsheet' exp_act xb_exp_act
	sort `categ_outsheet' `edu_outsheet' exp_act
	prog_desc_sum_comp_save "${TEMP_DIR}/akm_xb_exp_act`categ_ext'_${year_min}_${year_max}`ext_str'.dta"
}


*** final housekeeping
* close log
log close akm
